{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mongodb-developer/GenAI-Showcase/blob/main/notebooks/rag/naive_rag_implemenation_llamaindex.ipynb)\n",
        "\n",
        "[![View Article](https://img.shields.io/badge/View%20Article-blue)](https://www.mongodb.com/developer/products/atlas/rag-with-polm-stack-llamaindex-openai-mongodb/)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "8zNZYwZED_72",
        "outputId": "0a3f72df-60be-471f-999c-61f86f51a31d"
      },
      "outputs": [],
      "source": [
        "!pip install llama-index\n",
        "!pip install llama-index-vector-stores-mongodb\n",
        "!pip install llama-index-embeddings-openai\n",
        "!pip install pymongo\n",
        "!pip install datasets\n",
        "!pip install pandas"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "PNfX5evMgKbB",
        "outputId": "87d19aef-7b9f-4f57-d0d8-dc6246348da4"
      },
      "outputs": [],
      "source": [
        "%env OPENAI_API_KEY=api_key"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 449
        },
        "id": "aHXn_O1OEqCs",
        "outputId": "7a6ee822-13dc-4710-f4c5-5082ae37ad3c"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "\n",
              "  <div id=\"df-f7e22752-1153-4a8a-9d36-4022c4b42129\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>awards</th>\n",
              "      <th>metacritic</th>\n",
              "      <th>rated</th>\n",
              "      <th>fullplot</th>\n",
              "      <th>title</th>\n",
              "      <th>writers</th>\n",
              "      <th>languages</th>\n",
              "      <th>plot</th>\n",
              "      <th>plot_embedding</th>\n",
              "      <th>runtime</th>\n",
              "      <th>countries</th>\n",
              "      <th>genres</th>\n",
              "      <th>directors</th>\n",
              "      <th>cast</th>\n",
              "      <th>type</th>\n",
              "      <th>imdb</th>\n",
              "      <th>poster</th>\n",
              "      <th>num_mflix_comments</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>{'nominations': 0, 'text': '1 win.', 'wins': 1}</td>\n",
              "      <td>NaN</td>\n",
              "      <td>None</td>\n",
              "      <td>Young Pauline is left a lot of money when her ...</td>\n",
              "      <td>The Perils of Pauline</td>\n",
              "      <td>[Charles W. Goddard (screenplay), Basil Dickey...</td>\n",
              "      <td>[English]</td>\n",
              "      <td>Young Pauline is left a lot of money when her ...</td>\n",
              "      <td>[0.00072939653, -0.026834568, 0.013515796, -0....</td>\n",
              "      <td>199.0</td>\n",
              "      <td>[USA]</td>\n",
              "      <td>[Action]</td>\n",
              "      <td>[Louis J. Gasnier, Donald MacKenzie]</td>\n",
              "      <td>[Pearl White, Crane Wilbur, Paul Panzer, Edwar...</td>\n",
              "      <td>movie</td>\n",
              "      <td>{'id': 4465, 'rating': 7.6, 'votes': 744}</td>\n",
              "      <td>https://m.media-amazon.com/images/M/MV5BMzgxOD...</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>{'nominations': 1, 'text': '1 nomination.', 'w...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>TV-G</td>\n",
              "      <td>As a penniless man worries about how he will m...</td>\n",
              "      <td>From Hand to Mouth</td>\n",
              "      <td>[H.M. Walker (titles)]</td>\n",
              "      <td>[English]</td>\n",
              "      <td>A penniless young man tries to save an heiress...</td>\n",
              "      <td>[-0.022837115, -0.022941574, 0.014937485, -0.0...</td>\n",
              "      <td>22.0</td>\n",
              "      <td>[USA]</td>\n",
              "      <td>[Comedy, Short, Action]</td>\n",
              "      <td>[Alfred J. Goulding, Hal Roach]</td>\n",
              "      <td>[Harold Lloyd, Mildred Davis, 'Snub' Pollard, ...</td>\n",
              "      <td>movie</td>\n",
              "      <td>{'id': 10146, 'rating': 7.0, 'votes': 639}</td>\n",
              "      <td>https://m.media-amazon.com/images/M/MV5BNzE1OW...</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>{'nominations': 0, 'text': '1 win.', 'wins': 1}</td>\n",
              "      <td>NaN</td>\n",
              "      <td>None</td>\n",
              "      <td>Michael \"Beau\" Geste leaves England in disgrac...</td>\n",
              "      <td>Beau Geste</td>\n",
              "      <td>[Herbert Brenon (adaptation), John Russell (ad...</td>\n",
              "      <td>[English]</td>\n",
              "      <td>Michael \"Beau\" Geste leaves England in disgrac...</td>\n",
              "      <td>[0.00023330493, -0.028511643, 0.014653289, -0....</td>\n",
              "      <td>101.0</td>\n",
              "      <td>[USA]</td>\n",
              "      <td>[Action, Adventure, Drama]</td>\n",
              "      <td>[Herbert Brenon]</td>\n",
              "      <td>[Ronald Colman, Neil Hamilton, Ralph Forbes, A...</td>\n",
              "      <td>movie</td>\n",
              "      <td>{'id': 16634, 'rating': 6.9, 'votes': 222}</td>\n",
              "      <td>None</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>{'nominations': 0, 'text': '1 win.', 'wins': 1}</td>\n",
              "      <td>NaN</td>\n",
              "      <td>None</td>\n",
              "      <td>A nobleman vows to avenge the death of his fat...</td>\n",
              "      <td>The Black Pirate</td>\n",
              "      <td>[Douglas Fairbanks (story), Jack Cunningham (a...</td>\n",
              "      <td>None</td>\n",
              "      <td>Seeking revenge, an athletic young man joins t...</td>\n",
              "      <td>[-0.005927917, -0.033394486, 0.0015323418, -0....</td>\n",
              "      <td>88.0</td>\n",
              "      <td>[USA]</td>\n",
              "      <td>[Adventure, Action]</td>\n",
              "      <td>[Albert Parker]</td>\n",
              "      <td>[Billie Dove, Tempe Pigott, Donald Crisp, Sam ...</td>\n",
              "      <td>movie</td>\n",
              "      <td>{'id': 16654, 'rating': 7.2, 'votes': 1146}</td>\n",
              "      <td>https://m.media-amazon.com/images/M/MV5BMzU0ND...</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>{'nominations': 1, 'text': '1 nomination.', 'w...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>PASSED</td>\n",
              "      <td>The Uptown Boy, J. Harold Manners (Lloyd) is a...</td>\n",
              "      <td>For Heaven's Sake</td>\n",
              "      <td>[Ted Wilde (story), John Grey (story), Clyde B...</td>\n",
              "      <td>[English]</td>\n",
              "      <td>An irresponsible young millionaire changes his...</td>\n",
              "      <td>[-0.0059373598, -0.026604708, -0.0070914757, -...</td>\n",
              "      <td>58.0</td>\n",
              "      <td>[USA]</td>\n",
              "      <td>[Action, Comedy, Romance]</td>\n",
              "      <td>[Sam Taylor]</td>\n",
              "      <td>[Harold Lloyd, Jobyna Ralston, Noah Young, Jim...</td>\n",
              "      <td>movie</td>\n",
              "      <td>{'id': 16895, 'rating': 7.6, 'votes': 918}</td>\n",
              "      <td>https://m.media-amazon.com/images/M/MV5BMTcxMT...</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f7e22752-1153-4a8a-9d36-4022c4b42129')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-f7e22752-1153-4a8a-9d36-4022c4b42129 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-f7e22752-1153-4a8a-9d36-4022c4b42129');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-849e98ed-14a9-4812-93e8-59b2f1a58f95\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-849e98ed-14a9-4812-93e8-59b2f1a58f95')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-849e98ed-14a9-4812-93e8-59b2f1a58f95 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "text/plain": [
              "                                              awards  metacritic   rated  \\\n",
              "0    {'nominations': 0, 'text': '1 win.', 'wins': 1}         NaN    None   \n",
              "1  {'nominations': 1, 'text': '1 nomination.', 'w...         NaN    TV-G   \n",
              "2    {'nominations': 0, 'text': '1 win.', 'wins': 1}         NaN    None   \n",
              "3    {'nominations': 0, 'text': '1 win.', 'wins': 1}         NaN    None   \n",
              "4  {'nominations': 1, 'text': '1 nomination.', 'w...         NaN  PASSED   \n",
              "\n",
              "                                            fullplot                  title  \\\n",
              "0  Young Pauline is left a lot of money when her ...  The Perils of Pauline   \n",
              "1  As a penniless man worries about how he will m...     From Hand to Mouth   \n",
              "2  Michael \"Beau\" Geste leaves England in disgrac...             Beau Geste   \n",
              "3  A nobleman vows to avenge the death of his fat...       The Black Pirate   \n",
              "4  The Uptown Boy, J. Harold Manners (Lloyd) is a...      For Heaven's Sake   \n",
              "\n",
              "                                             writers  languages  \\\n",
              "0  [Charles W. Goddard (screenplay), Basil Dickey...  [English]   \n",
              "1                             [H.M. Walker (titles)]  [English]   \n",
              "2  [Herbert Brenon (adaptation), John Russell (ad...  [English]   \n",
              "3  [Douglas Fairbanks (story), Jack Cunningham (a...       None   \n",
              "4  [Ted Wilde (story), John Grey (story), Clyde B...  [English]   \n",
              "\n",
              "                                                plot  \\\n",
              "0  Young Pauline is left a lot of money when her ...   \n",
              "1  A penniless young man tries to save an heiress...   \n",
              "2  Michael \"Beau\" Geste leaves England in disgrac...   \n",
              "3  Seeking revenge, an athletic young man joins t...   \n",
              "4  An irresponsible young millionaire changes his...   \n",
              "\n",
              "                                      plot_embedding  runtime countries  \\\n",
              "0  [0.00072939653, -0.026834568, 0.013515796, -0....    199.0     [USA]   \n",
              "1  [-0.022837115, -0.022941574, 0.014937485, -0.0...     22.0     [USA]   \n",
              "2  [0.00023330493, -0.028511643, 0.014653289, -0....    101.0     [USA]   \n",
              "3  [-0.005927917, -0.033394486, 0.0015323418, -0....     88.0     [USA]   \n",
              "4  [-0.0059373598, -0.026604708, -0.0070914757, -...     58.0     [USA]   \n",
              "\n",
              "                       genres                             directors  \\\n",
              "0                    [Action]  [Louis J. Gasnier, Donald MacKenzie]   \n",
              "1     [Comedy, Short, Action]       [Alfred J. Goulding, Hal Roach]   \n",
              "2  [Action, Adventure, Drama]                      [Herbert Brenon]   \n",
              "3         [Adventure, Action]                       [Albert Parker]   \n",
              "4   [Action, Comedy, Romance]                          [Sam Taylor]   \n",
              "\n",
              "                                                cast   type  \\\n",
              "0  [Pearl White, Crane Wilbur, Paul Panzer, Edwar...  movie   \n",
              "1  [Harold Lloyd, Mildred Davis, 'Snub' Pollard, ...  movie   \n",
              "2  [Ronald Colman, Neil Hamilton, Ralph Forbes, A...  movie   \n",
              "3  [Billie Dove, Tempe Pigott, Donald Crisp, Sam ...  movie   \n",
              "4  [Harold Lloyd, Jobyna Ralston, Noah Young, Jim...  movie   \n",
              "\n",
              "                                          imdb  \\\n",
              "0    {'id': 4465, 'rating': 7.6, 'votes': 744}   \n",
              "1   {'id': 10146, 'rating': 7.0, 'votes': 639}   \n",
              "2   {'id': 16634, 'rating': 6.9, 'votes': 222}   \n",
              "3  {'id': 16654, 'rating': 7.2, 'votes': 1146}   \n",
              "4   {'id': 16895, 'rating': 7.6, 'votes': 918}   \n",
              "\n",
              "                                              poster  num_mflix_comments  \n",
              "0  https://m.media-amazon.com/images/M/MV5BMzgxOD...                   0  \n",
              "1  https://m.media-amazon.com/images/M/MV5BNzE1OW...                   0  \n",
              "2                                               None                   0  \n",
              "3  https://m.media-amazon.com/images/M/MV5BMzU0ND...                   1  \n",
              "4  https://m.media-amazon.com/images/M/MV5BMTcxMT...                   0  "
            ]
          },
          "execution_count": 3,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "import pandas as pd\n",
        "from datasets import load_dataset\n",
        "\n",
        "# https://huggingface.co/datasets/MongoDB/embedded_movies\n",
        "dataset = load_dataset(\"MongoDB/embedded_movies\")\n",
        "\n",
        "# Convert the dataset to a pandas dataframe\n",
        "dataset_df = pd.DataFrame(dataset[\"train\"])\n",
        "\n",
        "dataset_df.head(5)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 745
        },
        "id": "Cp8QPlDWqfFP",
        "outputId": "082aa033-8dec-4499-bae7-334d730a4c95"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "\n",
            "Number of missing values in each column after removal:\n",
            "awards                  0\n",
            "metacritic            893\n",
            "rated                 279\n",
            "fullplot                0\n",
            "title                   0\n",
            "writers                13\n",
            "languages               1\n",
            "plot                    0\n",
            "plot_embedding          1\n",
            "runtime                14\n",
            "countries               0\n",
            "genres                  0\n",
            "directors              12\n",
            "cast                    1\n",
            "type                    0\n",
            "imdb                    0\n",
            "poster                 78\n",
            "num_mflix_comments      0\n",
            "dtype: int64\n"
          ]
        },
        {
          "data": {
            "text/html": [
              "\n",
              "  <div id=\"df-2c7cd9d9-da04-41e3-a351-93665993ec1b\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>awards</th>\n",
              "      <th>metacritic</th>\n",
              "      <th>rated</th>\n",
              "      <th>fullplot</th>\n",
              "      <th>title</th>\n",
              "      <th>writers</th>\n",
              "      <th>languages</th>\n",
              "      <th>plot</th>\n",
              "      <th>runtime</th>\n",
              "      <th>countries</th>\n",
              "      <th>genres</th>\n",
              "      <th>directors</th>\n",
              "      <th>cast</th>\n",
              "      <th>type</th>\n",
              "      <th>imdb</th>\n",
              "      <th>poster</th>\n",
              "      <th>num_mflix_comments</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>{'nominations': 0, 'text': '1 win.', 'wins': 1}</td>\n",
              "      <td>NaN</td>\n",
              "      <td>None</td>\n",
              "      <td>Young Pauline is left a lot of money when her ...</td>\n",
              "      <td>The Perils of Pauline</td>\n",
              "      <td>[Charles W. Goddard (screenplay), Basil Dickey...</td>\n",
              "      <td>[English]</td>\n",
              "      <td>Young Pauline is left a lot of money when her ...</td>\n",
              "      <td>199.0</td>\n",
              "      <td>[USA]</td>\n",
              "      <td>[Action]</td>\n",
              "      <td>[Louis J. Gasnier, Donald MacKenzie]</td>\n",
              "      <td>[Pearl White, Crane Wilbur, Paul Panzer, Edwar...</td>\n",
              "      <td>movie</td>\n",
              "      <td>{'id': 4465, 'rating': 7.6, 'votes': 744}</td>\n",
              "      <td>https://m.media-amazon.com/images/M/MV5BMzgxOD...</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>{'nominations': 1, 'text': '1 nomination.', 'w...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>TV-G</td>\n",
              "      <td>As a penniless man worries about how he will m...</td>\n",
              "      <td>From Hand to Mouth</td>\n",
              "      <td>[H.M. Walker (titles)]</td>\n",
              "      <td>[English]</td>\n",
              "      <td>A penniless young man tries to save an heiress...</td>\n",
              "      <td>22.0</td>\n",
              "      <td>[USA]</td>\n",
              "      <td>[Comedy, Short, Action]</td>\n",
              "      <td>[Alfred J. Goulding, Hal Roach]</td>\n",
              "      <td>[Harold Lloyd, Mildred Davis, 'Snub' Pollard, ...</td>\n",
              "      <td>movie</td>\n",
              "      <td>{'id': 10146, 'rating': 7.0, 'votes': 639}</td>\n",
              "      <td>https://m.media-amazon.com/images/M/MV5BNzE1OW...</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>{'nominations': 0, 'text': '1 win.', 'wins': 1}</td>\n",
              "      <td>NaN</td>\n",
              "      <td>None</td>\n",
              "      <td>Michael \"Beau\" Geste leaves England in disgrac...</td>\n",
              "      <td>Beau Geste</td>\n",
              "      <td>[Herbert Brenon (adaptation), John Russell (ad...</td>\n",
              "      <td>[English]</td>\n",
              "      <td>Michael \"Beau\" Geste leaves England in disgrac...</td>\n",
              "      <td>101.0</td>\n",
              "      <td>[USA]</td>\n",
              "      <td>[Action, Adventure, Drama]</td>\n",
              "      <td>[Herbert Brenon]</td>\n",
              "      <td>[Ronald Colman, Neil Hamilton, Ralph Forbes, A...</td>\n",
              "      <td>movie</td>\n",
              "      <td>{'id': 16634, 'rating': 6.9, 'votes': 222}</td>\n",
              "      <td>None</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>{'nominations': 0, 'text': '1 win.', 'wins': 1}</td>\n",
              "      <td>NaN</td>\n",
              "      <td>None</td>\n",
              "      <td>A nobleman vows to avenge the death of his fat...</td>\n",
              "      <td>The Black Pirate</td>\n",
              "      <td>[Douglas Fairbanks (story), Jack Cunningham (a...</td>\n",
              "      <td>None</td>\n",
              "      <td>Seeking revenge, an athletic young man joins t...</td>\n",
              "      <td>88.0</td>\n",
              "      <td>[USA]</td>\n",
              "      <td>[Adventure, Action]</td>\n",
              "      <td>[Albert Parker]</td>\n",
              "      <td>[Billie Dove, Tempe Pigott, Donald Crisp, Sam ...</td>\n",
              "      <td>movie</td>\n",
              "      <td>{'id': 16654, 'rating': 7.2, 'votes': 1146}</td>\n",
              "      <td>https://m.media-amazon.com/images/M/MV5BMzU0ND...</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>{'nominations': 1, 'text': '1 nomination.', 'w...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>PASSED</td>\n",
              "      <td>The Uptown Boy, J. Harold Manners (Lloyd) is a...</td>\n",
              "      <td>For Heaven's Sake</td>\n",
              "      <td>[Ted Wilde (story), John Grey (story), Clyde B...</td>\n",
              "      <td>[English]</td>\n",
              "      <td>An irresponsible young millionaire changes his...</td>\n",
              "      <td>58.0</td>\n",
              "      <td>[USA]</td>\n",
              "      <td>[Action, Comedy, Romance]</td>\n",
              "      <td>[Sam Taylor]</td>\n",
              "      <td>[Harold Lloyd, Jobyna Ralston, Noah Young, Jim...</td>\n",
              "      <td>movie</td>\n",
              "      <td>{'id': 16895, 'rating': 7.6, 'votes': 918}</td>\n",
              "      <td>https://m.media-amazon.com/images/M/MV5BMTcxMT...</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-2c7cd9d9-da04-41e3-a351-93665993ec1b')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-2c7cd9d9-da04-41e3-a351-93665993ec1b button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-2c7cd9d9-da04-41e3-a351-93665993ec1b');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-1f514031-4020-4cfe-8665-439465e516c2\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-1f514031-4020-4cfe-8665-439465e516c2')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-1f514031-4020-4cfe-8665-439465e516c2 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "text/plain": [
              "                                              awards  metacritic   rated  \\\n",
              "0    {'nominations': 0, 'text': '1 win.', 'wins': 1}         NaN    None   \n",
              "1  {'nominations': 1, 'text': '1 nomination.', 'w...         NaN    TV-G   \n",
              "2    {'nominations': 0, 'text': '1 win.', 'wins': 1}         NaN    None   \n",
              "3    {'nominations': 0, 'text': '1 win.', 'wins': 1}         NaN    None   \n",
              "4  {'nominations': 1, 'text': '1 nomination.', 'w...         NaN  PASSED   \n",
              "\n",
              "                                            fullplot                  title  \\\n",
              "0  Young Pauline is left a lot of money when her ...  The Perils of Pauline   \n",
              "1  As a penniless man worries about how he will m...     From Hand to Mouth   \n",
              "2  Michael \"Beau\" Geste leaves England in disgrac...             Beau Geste   \n",
              "3  A nobleman vows to avenge the death of his fat...       The Black Pirate   \n",
              "4  The Uptown Boy, J. Harold Manners (Lloyd) is a...      For Heaven's Sake   \n",
              "\n",
              "                                             writers  languages  \\\n",
              "0  [Charles W. Goddard (screenplay), Basil Dickey...  [English]   \n",
              "1                             [H.M. Walker (titles)]  [English]   \n",
              "2  [Herbert Brenon (adaptation), John Russell (ad...  [English]   \n",
              "3  [Douglas Fairbanks (story), Jack Cunningham (a...       None   \n",
              "4  [Ted Wilde (story), John Grey (story), Clyde B...  [English]   \n",
              "\n",
              "                                                plot  runtime countries  \\\n",
              "0  Young Pauline is left a lot of money when her ...    199.0     [USA]   \n",
              "1  A penniless young man tries to save an heiress...     22.0     [USA]   \n",
              "2  Michael \"Beau\" Geste leaves England in disgrac...    101.0     [USA]   \n",
              "3  Seeking revenge, an athletic young man joins t...     88.0     [USA]   \n",
              "4  An irresponsible young millionaire changes his...     58.0     [USA]   \n",
              "\n",
              "                       genres                             directors  \\\n",
              "0                    [Action]  [Louis J. Gasnier, Donald MacKenzie]   \n",
              "1     [Comedy, Short, Action]       [Alfred J. Goulding, Hal Roach]   \n",
              "2  [Action, Adventure, Drama]                      [Herbert Brenon]   \n",
              "3         [Adventure, Action]                       [Albert Parker]   \n",
              "4   [Action, Comedy, Romance]                          [Sam Taylor]   \n",
              "\n",
              "                                                cast   type  \\\n",
              "0  [Pearl White, Crane Wilbur, Paul Panzer, Edwar...  movie   \n",
              "1  [Harold Lloyd, Mildred Davis, 'Snub' Pollard, ...  movie   \n",
              "2  [Ronald Colman, Neil Hamilton, Ralph Forbes, A...  movie   \n",
              "3  [Billie Dove, Tempe Pigott, Donald Crisp, Sam ...  movie   \n",
              "4  [Harold Lloyd, Jobyna Ralston, Noah Young, Jim...  movie   \n",
              "\n",
              "                                          imdb  \\\n",
              "0    {'id': 4465, 'rating': 7.6, 'votes': 744}   \n",
              "1   {'id': 10146, 'rating': 7.0, 'votes': 639}   \n",
              "2   {'id': 16634, 'rating': 6.9, 'votes': 222}   \n",
              "3  {'id': 16654, 'rating': 7.2, 'votes': 1146}   \n",
              "4   {'id': 16895, 'rating': 7.6, 'votes': 918}   \n",
              "\n",
              "                                              poster  num_mflix_comments  \n",
              "0  https://m.media-amazon.com/images/M/MV5BMzgxOD...                   0  \n",
              "1  https://m.media-amazon.com/images/M/MV5BNzE1OW...                   0  \n",
              "2                                               None                   0  \n",
              "3  https://m.media-amazon.com/images/M/MV5BMzU0ND...                   1  \n",
              "4  https://m.media-amazon.com/images/M/MV5BMTcxMT...                   0  "
            ]
          },
          "execution_count": 4,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "# Remove data point where fullplot coloumn is missing\n",
        "dataset_df = dataset_df.dropna(subset=[\"fullplot\"])\n",
        "print(\"\\nNumber of missing values in each column after removal:\")\n",
        "print(dataset_df.isnull().sum())\n",
        "\n",
        "# Remove the plot_embedding from each data point in the dataset as we are going to create new embeddings with the new OpenAI emebedding Model \"text-embedding-3-small\"\n",
        "dataset_df = dataset_df.drop(columns=[\"plot_embedding\"])\n",
        "\n",
        "dataset_df.head(5)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "id": "sgy4TIskUmd7"
      },
      "outputs": [],
      "source": [
        "from llama_index.core.settings import Settings\n",
        "from llama_index.embeddings.openai import OpenAIEmbedding\n",
        "from llama_index.llms.openai import OpenAI\n",
        "\n",
        "embed_model = OpenAIEmbedding(model=\"text-embedding-3-small\", dimensions=256)\n",
        "llm = OpenAI()\n",
        "\n",
        "Settings.llm = llm\n",
        "Settings.embed_model = embed_model"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "7H2HSRM1q58X",
        "outputId": "c28d82b6-e091-4eb5-83de-a64d2a8bdaaa"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "\n",
            "The LLM sees this: \n",
            " Metadata: awards=>{\"nominations\": 0, \"text\": \"1 win.\", \"wins\": 1}\n",
            "rated=>None\n",
            "title=>The Perils of Pauline\n",
            "writers=>[\"Charles W. Goddard (screenplay)\", \"Basil Dickey (screenplay)\", \"Charles W. Goddard (novel)\", \"George B. Seitz\", \"Bertram Millhauser\"]\n",
            "languages=>[\"English\"]\n",
            "plot=>Young Pauline is left a lot of money when her wealthy uncle dies. However, her uncle's secretary has been named as her guardian until she marries, at which time she will officially take ...\n",
            "runtime=>199.0\n",
            "countries=>[\"USA\"]\n",
            "genres=>[\"Action\"]\n",
            "directors=>[\"Louis J. Gasnier\", \"Donald MacKenzie\"]\n",
            "cast=>[\"Pearl White\", \"Crane Wilbur\", \"Paul Panzer\", \"Edward Jos\\u00e8\"]\n",
            "type=>movie\n",
            "imdb=>{\"id\": 4465, \"rating\": 7.6, \"votes\": 744}\n",
            "poster=>https://m.media-amazon.com/images/M/MV5BMzgxODk1Mzk2Ml5BMl5BanBnXkFtZTgwMDg0NzkwMjE@._V1_SY1000_SX677_AL_.jpg\n",
            "num_mflix_comments=>0\n",
            "-----\n",
            "Content: Young Pauline is left a lot of money when her wealthy uncle dies. However, her uncle's secretary has been named as her guardian until she marries, at which time she will officially take possession of her inheritance. Meanwhile, her \"guardian\" and his confederates constantly come up with schemes to get rid of Pauline so that he can get his hands on the money himself.\n",
            "\n",
            "The Embedding model sees this: \n",
            " Metadata: awards=>{\"nominations\": 0, \"text\": \"1 win.\", \"wins\": 1}\n",
            "title=>The Perils of Pauline\n",
            "writers=>[\"Charles W. Goddard (screenplay)\", \"Basil Dickey (screenplay)\", \"Charles W. Goddard (novel)\", \"George B. Seitz\", \"Bertram Millhauser\"]\n",
            "languages=>[\"English\"]\n",
            "plot=>Young Pauline is left a lot of money when her wealthy uncle dies. However, her uncle's secretary has been named as her guardian until she marries, at which time she will officially take ...\n",
            "countries=>[\"USA\"]\n",
            "genres=>[\"Action\"]\n",
            "directors=>[\"Louis J. Gasnier\", \"Donald MacKenzie\"]\n",
            "cast=>[\"Pearl White\", \"Crane Wilbur\", \"Paul Panzer\", \"Edward Jos\\u00e8\"]\n",
            "type=>movie\n",
            "imdb=>{\"id\": 4465, \"rating\": 7.6, \"votes\": 744}\n",
            "-----\n",
            "Content: Young Pauline is left a lot of money when her wealthy uncle dies. However, her uncle's secretary has been named as her guardian until she marries, at which time she will officially take possession of her inheritance. Meanwhile, her \"guardian\" and his confederates constantly come up with schemes to get rid of Pauline so that he can get his hands on the money himself.\n"
          ]
        }
      ],
      "source": [
        "import json\n",
        "\n",
        "from llama_index.core import Document\n",
        "from llama_index.core.schema import MetadataMode\n",
        "\n",
        "# Convert the DataFrame to a JSON string representation\n",
        "documents_json = dataset_df.to_json(orient=\"records\")\n",
        "# Load the JSON string into a Python list of dictionaries\n",
        "documents_list = json.loads(documents_json)\n",
        "\n",
        "llama_documents = []\n",
        "\n",
        "for document in documents_list:\n",
        "    # Value for metadata must be one of (str, int, float, None)\n",
        "    document[\"writers\"] = json.dumps(document[\"writers\"])\n",
        "    document[\"languages\"] = json.dumps(document[\"languages\"])\n",
        "    document[\"genres\"] = json.dumps(document[\"genres\"])\n",
        "    document[\"cast\"] = json.dumps(document[\"cast\"])\n",
        "    document[\"directors\"] = json.dumps(document[\"directors\"])\n",
        "    document[\"countries\"] = json.dumps(document[\"countries\"])\n",
        "    document[\"imdb\"] = json.dumps(document[\"imdb\"])\n",
        "    document[\"awards\"] = json.dumps(document[\"awards\"])\n",
        "\n",
        "    # Create a Document object with the text and excluded metadata for llm and embedding models\n",
        "    llama_document = Document(\n",
        "        text=document[\"fullplot\"],\n",
        "        metadata=document,\n",
        "        excluded_llm_metadata_keys=[\"fullplot\", \"metacritic\"],\n",
        "        excluded_embed_metadata_keys=[\n",
        "            \"fullplot\",\n",
        "            \"metacritic\",\n",
        "            \"poster\",\n",
        "            \"num_mflix_comments\",\n",
        "            \"runtime\",\n",
        "            \"rated\",\n",
        "        ],\n",
        "        metadata_template=\"{key}=>{value}\",\n",
        "        text_template=\"Metadata: {metadata_str}\\n-----\\nContent: {content}\",\n",
        "    )\n",
        "\n",
        "    llama_documents.append(llama_document)\n",
        "\n",
        "# Observing an example of what the LLM and Embedding model receive as input\n",
        "print(\n",
        "    \"\\nThe LLM sees this: \\n\",\n",
        "    llama_documents[0].get_content(metadata_mode=MetadataMode.LLM),\n",
        ")\n",
        "print(\n",
        "    \"\\nThe Embedding model sees this: \\n\",\n",
        "    llama_documents[0].get_content(metadata_mode=MetadataMode.EMBED),\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "w30z93C72quV",
        "outputId": "959ded32-3db7-4dfb-ec8c-630d13e821c4"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "Document(id_='93600db5-2bfc-4add-98ad-f46c7e453746', embedding=None, metadata={'awards': '{\"nominations\": 0, \"text\": \"1 win.\", \"wins\": 1}', 'metacritic': None, 'rated': None, 'fullplot': 'Young Pauline is left a lot of money when her wealthy uncle dies. However, her uncle\\'s secretary has been named as her guardian until she marries, at which time she will officially take possession of her inheritance. Meanwhile, her \"guardian\" and his confederates constantly come up with schemes to get rid of Pauline so that he can get his hands on the money himself.', 'title': 'The Perils of Pauline', 'writers': '[\"Charles W. Goddard (screenplay)\", \"Basil Dickey (screenplay)\", \"Charles W. Goddard (novel)\", \"George B. Seitz\", \"Bertram Millhauser\"]', 'languages': '[\"English\"]', 'plot': \"Young Pauline is left a lot of money when her wealthy uncle dies. However, her uncle's secretary has been named as her guardian until she marries, at which time she will officially take ...\", 'runtime': 199.0, 'countries': '[\"USA\"]', 'genres': '[\"Action\"]', 'directors': '[\"Louis J. Gasnier\", \"Donald MacKenzie\"]', 'cast': '[\"Pearl White\", \"Crane Wilbur\", \"Paul Panzer\", \"Edward Jos\\\\u00e8\"]', 'type': 'movie', 'imdb': '{\"id\": 4465, \"rating\": 7.6, \"votes\": 744}', 'poster': 'https://m.media-amazon.com/images/M/MV5BMzgxODk1Mzk2Ml5BMl5BanBnXkFtZTgwMDg0NzkwMjE@._V1_SY1000_SX677_AL_.jpg', 'num_mflix_comments': 0}, excluded_embed_metadata_keys=['fullplot', 'metacritic', 'poster', 'num_mflix_comments', 'runtime', 'rated'], excluded_llm_metadata_keys=['fullplot', 'metacritic'], relationships={}, text='Young Pauline is left a lot of money when her wealthy uncle dies. However, her uncle\\'s secretary has been named as her guardian until she marries, at which time she will officially take possession of her inheritance. Meanwhile, her \"guardian\" and his confederates constantly come up with schemes to get rid of Pauline so that he can get his hands on the money himself.', start_char_idx=None, end_char_idx=None, text_template='Metadata: {metadata_str}\\n-----\\nContent: {content}', metadata_template='{key}=>{value}', metadata_seperator='\\n')"
            ]
          },
          "execution_count": 7,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "llama_documents[0]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {
        "id": "NEWJp6rnUYGU"
      },
      "outputs": [],
      "source": [
        "from llama_index.core.node_parser import SentenceSplitter\n",
        "\n",
        "parser = SentenceSplitter()\n",
        "nodes = parser.get_nodes_from_documents(llama_documents)\n",
        "\n",
        "for node in nodes:\n",
        "    node_embedding = embed_model.get_text_embedding(\n",
        "        node.get_content(metadata_mode=\"all\")\n",
        "    )\n",
        "    node.embedding = node_embedding"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "OxFfZgwaesXI"
      },
      "source": [
        "Ensure your databse, collection and vector store index is setup on MongoDB Atlas for the collection or the following step won't work appropriately on MongoDB.\n",
        "\n",
        "\n",
        " - For assistance with database cluster setup and obtaining the URI, refer to this [guide](https://www.mongodb.com/docs/guides/atlas/cluster/) for setting up a MongoDB cluster, and this [guide](https://www.mongodb.com/docs/guides/atlas/connection-string/) to get your connection string. \n",
        "\n",
        " - Once you have successfully created a cluster, create the database and collection within the MongoDB Atlas cluster by clicking “+ Create Database”. The database will be named movies, and the collection will be named movies_records.\n",
        "\n",
        " - Creating a vector search index within the movies_records collection is essential for efficient document retrieval from MongoDB into our development environment. To achieve this, refer to the official [guide](https://www.mongodb.com/docs/atlas/atlas-vector-search/create-index/) on vector search index creation.\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "pA1u9dB2FjJP",
        "outputId": "5dcfa5d2-7088-4378-f3e6-635b30809dbe"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Connection to MongoDB successful\n"
          ]
        }
      ],
      "source": [
        "import pymongo\n",
        "from google.colab import userdata\n",
        "\n",
        "\n",
        "def get_mongo_client(mongo_uri):\n",
        "    \"\"\"Establish connection to the MongoDB.\"\"\"\n",
        "    try:\n",
        "        client = pymongo.MongoClient(\n",
        "            mongo_uri, appname=\"devrel.showcase.naive_rag_llamaindex\"\n",
        "        )\n",
        "        print(\"Connection to MongoDB successful\")\n",
        "        return client\n",
        "    except pymongo.errors.ConnectionFailure as e:\n",
        "        print(f\"Connection failed: {e}\")\n",
        "        return None\n",
        "\n",
        "\n",
        "mongo_uri = userdata.get(\"MONGO_URI\")\n",
        "if not mongo_uri:\n",
        "    print(\"MONGO_URI not set in environment variables\")\n",
        "\n",
        "mongo_client = get_mongo_client(mongo_uri)\n",
        "\n",
        "DB_NAME = \"movies\"\n",
        "COLLECTION_NAME = \"movies_records\"\n",
        "\n",
        "db = mongo_client[DB_NAME]\n",
        "collection = db[COLLECTION_NAME]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "W__uArA5edeb",
        "outputId": "1813a2aa-ac65-4959-a990-31f8589c0007"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "DeleteResult({'n': 0, 'electionId': ObjectId('7fffffff000000000000000a'), 'opTime': {'ts': Timestamp(1708000722, 1), 't': 10}, 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1708000722, 1), 'signature': {'hash': b'\\xd8\\x1a\\xaci\\xf5EN+\\xe2\\xd1\\xb3y8.${u5P\\xf3', 'keyId': 7320226449804230661}}, 'operationTime': Timestamp(1708000722, 1)}, acknowledged=True)"
            ]
          },
          "execution_count": 10,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "# To ensure we are working with a fresh collection\n",
        "# delete any existing records in the collection\n",
        "collection.delete_many({})"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "FXZ8pZ7gQ3Ij",
        "outputId": "3a4aabca-a5bb-4bd3-da35-33c61b26c815"
      },
      "outputs": [],
      "source": [
        "from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch\n",
        "\n",
        "vector_store = MongoDBAtlasVectorSearch(\n",
        "    mongo_client,\n",
        "    db_name=DB_NAME,\n",
        "    collection_name=COLLECTION_NAME,\n",
        "    index_name=\"vector_index\",\n",
        ")\n",
        "vector_store.add(nodes)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 12,
      "metadata": {
        "id": "Z9zpscTmT6i0"
      },
      "outputs": [],
      "source": [
        "from llama_index.core import VectorStoreIndex\n",
        "\n",
        "index = VectorStoreIndex.from_vector_store(vector_store)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 14,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 153
        },
        "id": "_OmDF877Gl7O",
        "outputId": "cc033433-9bfa-4e57-b3e7-7085bce0b2df"
      },
      "outputs": [
        {
          "data": {
            "text/markdown": [
              "**`Final Response:`** The movie \"Romancing the Stone\" would be a suitable romantic movie for the Christmas season. It is a romantic adventure film that follows a romance writer who sets off on a dangerous adventure to rescue her kidnapped sister. The movie has elements of romance, adventure, and comedy, making it an entertaining choice for the holiday season. Additionally, the movie has received positive reviews and has been nominated for awards, indicating its quality."
            ],
            "text/plain": [
              "<IPython.core.display.Markdown object>"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "[NodeWithScore(node=TextNode(id_='c6bbc236-e21d-49ab-b43d-db920b4946e6', embedding=None, metadata={'awards': '{\"nominations\": 2, \"text\": \"Nominated for 1 Oscar. Another 6 wins & 2 nominations.\", \"wins\": 7}', 'metacritic': None, 'rated': 'PG', 'fullplot': \"Joan Wilder, a mousy romance novelist, receives a treasure map in the mail from her recently murdered brother-in-law. Meanwhile, her sister Elaine is kidnapped in Colombia and the two criminals responsible demand that she travel to Colombia to exchange the map for her sister. Joan does, and quickly becomes lost in the jungle after being waylayed by Zolo, a vicious and corrupt Colombian cop who will stop at nothing to obtain the map. There, she meets an irreverent soldier-of-fortune named Jack Colton who agrees to bring her back to civilization. Together, they embark upon an adventure that could be straight out of Joan's novels.\", 'title': 'Romancing the Stone', 'writers': '[\"Diane Thomas\"]', 'languages': '[\"English\", \"Spanish\", \"French\"]', 'plot': 'A romance writer sets off to Colombia to ransom her kidnapped sister, and soon finds herself in the middle of a dangerous adventure.', 'runtime': 106.0, 'countries': '[\"USA\", \"Mexico\"]', 'genres': '[\"Action\", \"Adventure\", \"Comedy\"]', 'directors': '[\"Robert Zemeckis\"]', 'cast': '[\"Michael Douglas\", \"Kathleen Turner\", \"Danny DeVito\", \"Zack Norman\"]', 'type': 'movie', 'imdb': '{\"id\": 88011, \"rating\": 6.9, \"votes\": 59403}', 'poster': 'https://m.media-amazon.com/images/M/MV5BMDAwNjljMzEtMTc3Yy00NDg2LThjNDAtNjc0NGYyYjM2M2I1XkEyXkFqcGdeQXVyNDE5MTU2MDE@._V1_SY1000_SX677_AL_.jpg', 'num_mflix_comments': 0}, excluded_embed_metadata_keys=['fullplot', 'metacritic', 'poster', 'num_mflix_comments', 'runtime', 'rated'], excluded_llm_metadata_keys=['fullplot', 'metacritic'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='e50144b0-96ba-4a5a-b90a-3a2419f5b380', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'awards': '{\"nominations\": 2, \"text\": \"Nominated for 1 Oscar. Another 6 wins & 2 nominations.\", \"wins\": 7}', 'metacritic': None, 'rated': 'PG', 'fullplot': \"Joan Wilder, a mousy romance novelist, receives a treasure map in the mail from her recently murdered brother-in-law. Meanwhile, her sister Elaine is kidnapped in Colombia and the two criminals responsible demand that she travel to Colombia to exchange the map for her sister. Joan does, and quickly becomes lost in the jungle after being waylayed by Zolo, a vicious and corrupt Colombian cop who will stop at nothing to obtain the map. There, she meets an irreverent soldier-of-fortune named Jack Colton who agrees to bring her back to civilization. Together, they embark upon an adventure that could be straight out of Joan's novels.\", 'title': 'Romancing the Stone', 'writers': '[\"Diane Thomas\"]', 'languages': '[\"English\", \"Spanish\", \"French\"]', 'plot': 'A romance writer sets off to Colombia to ransom her kidnapped sister, and soon finds herself in the middle of a dangerous adventure.', 'runtime': 106.0, 'countries': '[\"USA\", \"Mexico\"]', 'genres': '[\"Action\", \"Adventure\", \"Comedy\"]', 'directors': '[\"Robert Zemeckis\"]', 'cast': '[\"Michael Douglas\", \"Kathleen Turner\", \"Danny DeVito\", \"Zack Norman\"]', 'type': 'movie', 'imdb': '{\"id\": 88011, \"rating\": 6.9, \"votes\": 59403}', 'poster': 'https://m.media-amazon.com/images/M/MV5BMDAwNjljMzEtMTc3Yy00NDg2LThjNDAtNjc0NGYyYjM2M2I1XkEyXkFqcGdeQXVyNDE5MTU2MDE@._V1_SY1000_SX677_AL_.jpg', 'num_mflix_comments': 0}, hash='b984e4f203b7b67eae14afa890718adb800a5816661ac2edf412aa96fd7dc10b'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='f895e43a-038a-4a1c-8a82-0e22868e35d7', node_type=<ObjectType.TEXT: '1'>, metadata={'awards': '{\"nominations\": 1, \"text\": \"1 nomination.\", \"wins\": 0}', 'metacritic': None, 'rated': 'R', 'fullplot': \"Chicago psychiatrist Judd Stevens (Roger Moore) is suspected of murdering one of his patients when the man turns up stabbed to death in the middle of the city. After repeated attempts to convince cops Rod Steiger and Elliott Gould of his innocence, Dr.Stevens is forced to go after the real villains himself, and he finds himself up against one of the city's most notorious Mafia kingpins.\", 'title': 'The Naked Face', 'writers': '[\"Bryan Forbes\", \"Sidney Sheldon (novel)\"]', 'languages': '[\"English\"]', 'plot': 'Chicago psychiatrist Judd Stevens (Roger Moore) is suspected of murdering one of his patients when the man turns up stabbed to death in the middle of the city. After repeated attempts to ...', 'runtime': 103.0, 'countries': '[\"USA\"]', 'genres': '[\"Action\", \"Mystery\", \"Thriller\"]', 'directors': '[\"Bryan Forbes\"]', 'cast': '[\"Roger Moore\", \"Rod Steiger\", \"Elliott Gould\", \"Art Carney\"]', 'type': 'movie', 'imdb': '{\"id\": 87777, \"rating\": 5.3, \"votes\": 654}', 'poster': 'https://m.media-amazon.com/images/M/MV5BMTg0NDM4MTY0NV5BMl5BanBnXkFtZTcwNTcwOTc2NA@@._V1_SY1000_SX677_AL_.jpg', 'num_mflix_comments': 1}, hash='066e2b3d12c5fab61175f52dd625ec41fb1fce1fe6fe4c892774227c576fdbbd'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='e31f1142-c6b6-4183-b14b-1634166b9d1f', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='9b9127e21d18792749a7a35321e04d29b8d77f7b454b0133205f9de1090038b4')}, text=\"Joan Wilder, a mousy romance novelist, receives a treasure map in the mail from her recently murdered brother-in-law. Meanwhile, her sister Elaine is kidnapped in Colombia and the two criminals responsible demand that she travel to Colombia to exchange the map for her sister. Joan does, and quickly becomes lost in the jungle after being waylayed by Zolo, a vicious and corrupt Colombian cop who will stop at nothing to obtain the map. There, she meets an irreverent soldier-of-fortune named Jack Colton who agrees to bring her back to civilization. Together, they embark upon an adventure that could be straight out of Joan's novels.\", start_char_idx=0, end_char_idx=635, text_template='Metadata: {metadata_str}\\n-----\\nContent: {content}', metadata_template='{key}=>{value}', metadata_seperator='\\n'), score=0.7502920627593994),\n",
            " NodeWithScore(node=TextNode(id_='5c7cef95-79e3-4c96-a009-4154ea125240', embedding=None, metadata={'awards': '{\"nominations\": 2, \"text\": \"Nominated for 2 Oscars. Another 1 win & 2 nominations.\", \"wins\": 3}', 'metacritic': 64.0, 'rated': 'PG-13', 'fullplot': 'In 1880, four men travel together to the city of Silverado. They come across with many dangers before they finally engage the \"bad guys\" and bring peace and equality back to the city.', 'title': 'Silverado', 'writers': '[\"Lawrence Kasdan\", \"Mark Kasdan\"]', 'languages': '[\"English\"]', 'plot': 'A misfit bunch of friends come together to right the injustices which exist in a small town.', 'runtime': 133.0, 'countries': '[\"USA\"]', 'genres': '[\"Action\", \"Crime\", \"Drama\"]', 'directors': '[\"Lawrence Kasdan\"]', 'cast': '[\"Kevin Kline\", \"Scott Glenn\", \"Kevin Costner\", \"Danny Glover\"]', 'type': 'movie', 'imdb': '{\"id\": 90022, \"rating\": 7.2, \"votes\": 26415}', 'poster': 'https://m.media-amazon.com/images/M/MV5BYTljNTE5YmUtMGEyZi00ZjI4LWEzYjUtZDY2YWEwNzVmZjRkXkEyXkFqcGdeQXVyNTI4MjkwNjA@._V1_SY1000_SX677_AL_.jpg', 'num_mflix_comments': 1}, excluded_embed_metadata_keys=['fullplot', 'metacritic', 'poster', 'num_mflix_comments', 'runtime', 'rated'], excluded_llm_metadata_keys=['fullplot', 'metacritic'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='decbc30c-c17e-4ba4-bd1e-72dce4ce383a', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'awards': '{\"nominations\": 2, \"text\": \"Nominated for 2 Oscars. Another 1 win & 2 nominations.\", \"wins\": 3}', 'metacritic': 64.0, 'rated': 'PG-13', 'fullplot': 'In 1880, four men travel together to the city of Silverado. They come across with many dangers before they finally engage the \"bad guys\" and bring peace and equality back to the city.', 'title': 'Silverado', 'writers': '[\"Lawrence Kasdan\", \"Mark Kasdan\"]', 'languages': '[\"English\"]', 'plot': 'A misfit bunch of friends come together to right the injustices which exist in a small town.', 'runtime': 133.0, 'countries': '[\"USA\"]', 'genres': '[\"Action\", \"Crime\", \"Drama\"]', 'directors': '[\"Lawrence Kasdan\"]', 'cast': '[\"Kevin Kline\", \"Scott Glenn\", \"Kevin Costner\", \"Danny Glover\"]', 'type': 'movie', 'imdb': '{\"id\": 90022, \"rating\": 7.2, \"votes\": 26415}', 'poster': 'https://m.media-amazon.com/images/M/MV5BYTljNTE5YmUtMGEyZi00ZjI4LWEzYjUtZDY2YWEwNzVmZjRkXkEyXkFqcGdeQXVyNTI4MjkwNjA@._V1_SY1000_SX677_AL_.jpg', 'num_mflix_comments': 1}, hash='80b77d835c7dfad9d57d300cf69ba388704e6f282f49dc23106489db03b8b441'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='1c04fb7f-ff8f-4e8c-84f6-74c57251446a', node_type=<ObjectType.TEXT: '1'>, metadata={'awards': '{\"nominations\": 5, \"text\": \"Nominated for 3 Oscars. Another 2 wins & 5 nominations.\", \"wins\": 5}', 'metacritic': None, 'rated': 'R', 'fullplot': 'A hardened convict and a younger prisoner escape from a brutal prison in the middle of winter only to find themselves on an out-of-control train with a female railway worker while being pursued by the vengeful head of security.', 'title': 'Runaway Train', 'writers': '[\"Djordje Milicevic (screenplay)\", \"Paul Zindel (screenplay)\", \"Edward Bunker (screenplay)\", \"Akira Kurosawa (based on a screenplay by)\"]', 'languages': '[\"English\"]', 'plot': 'Two escaped convicts and a female railway worker find themselves trapped on a train with no brakes and nobody driving.', 'runtime': 111.0, 'countries': '[\"USA\"]', 'genres': '[\"Action\", \"Adventure\", \"Drama\"]', 'directors': '[\"Andrey Konchalovskiy\"]', 'cast': '[\"Jon Voight\", \"Eric Roberts\", \"Rebecca De Mornay\", \"Kyle T. Heffner\"]', 'type': 'movie', 'imdb': '{\"id\": 89941, \"rating\": 7.3, \"votes\": 19652}', 'poster': 'https://m.media-amazon.com/images/M/MV5BODQyYWU1NGUtNjEzYS00YmNhLTk1YWEtZDdlZGQzMTI4MTI1XkEyXkFqcGdeQXVyMTQxNzMzNDI@._V1_SY1000_SX677_AL_.jpg', 'num_mflix_comments': 0}, hash='378c16de972df97080db94775cd46e57f6a0dd5a7472b357e0285eed2e3b7775'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='5df9410b-6597-45f4-95d5-fee1db8737b1', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='77e93faace9b0e102635d3ca997ff27bc03dbba66eaa2d830f0634289d16d927')}, text='In 1880, four men travel together to the city of Silverado. They come across with many dangers before they finally engage the \"bad guys\" and bring peace and equality back to the city.', start_char_idx=0, end_char_idx=183, text_template='Metadata: {metadata_str}\\n-----\\nContent: {content}', metadata_template='{key}=>{value}', metadata_seperator='\\n'), score=0.7419796586036682),\n",
            " NodeWithScore(node=TextNode(id_='ff28e815-5db5-4963-a9b8-99c64716eb00', embedding=None, metadata={'awards': '{\"nominations\": 1, \"text\": \"1 nomination.\", \"wins\": 0}', 'metacritic': None, 'rated': 'PASSED', 'fullplot': \"Dick Powell stars as Haven, a government private investigator assigned to investigate the murders of two cavalrymen. Travelling incognito, Haven arrives in a small frontier outpost, where saloon singer Charlie controls all illegal activities. After making short work of Charlie's burly henchman, Haven gets a job at her gambling emporium, biding his time and gathering evidence against the gorgeous crime chieftain Cast as a philosophical bartender, Burl Ives is afforded at least one opportunity to sing.\", 'title': 'Station West', 'writers': '[\"Frank Fenton (screenplay)\", \"Winston Miller (screenplay)\", \"Luke Short (novel)\"]', 'languages': '[\"English\"]', 'plot': 'When two US cavalrymen transporting a gold shipment get killed, US Army Intelligence investigator John Haven goes undercover to a mining and logging town to find the killers.', 'runtime': 87.0, 'countries': '[\"USA\"]', 'genres': '[\"Action\", \"Mystery\", \"Romance\"]', 'directors': '[\"Sidney Lanfield\"]', 'cast': '[\"Dick Powell\", \"Jane Greer\", \"Agnes Moorehead\", \"Burl Ives\"]', 'type': 'movie', 'imdb': '{\"id\": 40835, \"rating\": 6.8, \"votes\": 578}', 'poster': 'https://m.media-amazon.com/images/M/MV5BN2U3YWJjOWItOWY3Yy00NTMxLTkxMGUtOTQ1MzEzODM2MjRjXkEyXkFqcGdeQXVyNTk1MTk0MDI@._V1_SY1000_SX677_AL_.jpg', 'num_mflix_comments': 1}, excluded_embed_metadata_keys=['fullplot', 'metacritic', 'poster', 'num_mflix_comments', 'runtime', 'rated'], excluded_llm_metadata_keys=['fullplot', 'metacritic'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='b04254ab-2edb-47c1-9412-646575747ca8', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'awards': '{\"nominations\": 1, \"text\": \"1 nomination.\", \"wins\": 0}', 'metacritic': None, 'rated': 'PASSED', 'fullplot': \"Dick Powell stars as Haven, a government private investigator assigned to investigate the murders of two cavalrymen. Travelling incognito, Haven arrives in a small frontier outpost, where saloon singer Charlie controls all illegal activities. After making short work of Charlie's burly henchman, Haven gets a job at her gambling emporium, biding his time and gathering evidence against the gorgeous crime chieftain Cast as a philosophical bartender, Burl Ives is afforded at least one opportunity to sing.\", 'title': 'Station West', 'writers': '[\"Frank Fenton (screenplay)\", \"Winston Miller (screenplay)\", \"Luke Short (novel)\"]', 'languages': '[\"English\"]', 'plot': 'When two US cavalrymen transporting a gold shipment get killed, US Army Intelligence investigator John Haven goes undercover to a mining and logging town to find the killers.', 'runtime': 87.0, 'countries': '[\"USA\"]', 'genres': '[\"Action\", \"Mystery\", \"Romance\"]', 'directors': '[\"Sidney Lanfield\"]', 'cast': '[\"Dick Powell\", \"Jane Greer\", \"Agnes Moorehead\", \"Burl Ives\"]', 'type': 'movie', 'imdb': '{\"id\": 40835, \"rating\": 6.8, \"votes\": 578}', 'poster': 'https://m.media-amazon.com/images/M/MV5BN2U3YWJjOWItOWY3Yy00NTMxLTkxMGUtOTQ1MzEzODM2MjRjXkEyXkFqcGdeQXVyNTk1MTk0MDI@._V1_SY1000_SX677_AL_.jpg', 'num_mflix_comments': 1}, hash='90f541ac96dcffa4ac639e6ac25da415471164bf8d7930a29b6aed406d631ede'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='a48d8737-8615-48c1-9d4a-1ee127e34fb9', node_type=<ObjectType.TEXT: '1'>, metadata={'awards': '{\"nominations\": 1, \"text\": \"1 nomination.\", \"wins\": 0}', 'metacritic': None, 'rated': 'PASSED', 'fullplot': 'Jefty, owner of a roadhouse in a backwoods town, hires sultry, tough-talking torch singer Lily Stevens against the advice of his manager Pete Morgan. Jefty is smitten with Lily, who in turn exerts her charms on the more resistant Pete. When Pete finally falls for her and she turns down Jefty\\'s marriage proposal, they must face Jefty\\'s murderous jealousy and his twisted plots to \"punish\" the two.', 'title': 'Road House', 'writers': '[\"Edward Chodorov (screen play)\", \"Margaret Gruen (story)\", \"Oscar Saul (story)\"]', 'languages': '[\"English\"]', 'plot': 'A night club owner becomes infatuated with a torch singer and frames his best friend/manager for embezzlement when the chanteuse falls in love with him.', 'runtime': 95.0, 'countries': '[\"USA\"]', 'genres': '[\"Action\", \"Drama\", \"Film-Noir\"]', 'directors': '[\"Jean Negulesco\"]', 'cast': '[\"Ida Lupino\", \"Cornel Wilde\", \"Celeste Holm\", \"Richard Widmark\"]', 'type': 'movie', 'imdb': '{\"id\": 40740, \"rating\": 7.3, \"votes\": 1353}', 'poster': 'https://m.media-amazon.com/images/M/MV5BMjc1ZTNkM2UtYzY3Yi00ZWZmLTljYmEtNjYxZDNmYzk2ZjkzXkEyXkFqcGdeQXVyMjUxODE0MDY@._V1_SY1000_SX677_AL_.jpg', 'num_mflix_comments': 2}, hash='040b4a77fcc8fbb5347620e99a217d67b85dcdbd370d91bd23877722a499079f'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='75f37fbc-d75e-4a76-b86f-f15d9260afd1', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='9941706d03783561f3fc3200c26527493a62307f8532dcda60b20948c886b330')}, text=\"Dick Powell stars as Haven, a government private investigator assigned to investigate the murders of two cavalrymen. Travelling incognito, Haven arrives in a small frontier outpost, where saloon singer Charlie controls all illegal activities. After making short work of Charlie's burly henchman, Haven gets a job at her gambling emporium, biding his time and gathering evidence against the gorgeous crime chieftain Cast as a philosophical bartender, Burl Ives is afforded at least one opportunity to sing.\", start_char_idx=0, end_char_idx=505, text_template='Metadata: {metadata_str}\\n-----\\nContent: {content}', metadata_template='{key}=>{value}', metadata_seperator='\\n'), score=0.7337073087692261)]\n"
          ]
        }
      ],
      "source": [
        "import pprint\n",
        "\n",
        "from llama_index.core.response.notebook_utils import display_response\n",
        "\n",
        "query_engine = index.as_query_engine(similarity_top_k=3)\n",
        "\n",
        "query = \"Recommend a romantic movie suitable for the christmas season and justify your selecton\"\n",
        "\n",
        "response = query_engine.query(query)\n",
        "display_response(response)\n",
        "pprint.pprint(response.source_nodes)"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "state": {}
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
